#!/usr/bin/env python3
# vim: tabstop=4 shiftwidth=4 expandtab

import re
import time
import urllib.parse
from dataclasses import dataclass
from datetime import datetime

import feedparser

from gemini_antenna.URLHelper import URLHelper

urllib.parse.uses_relative.append("gemini")
urllib.parse.uses_netloc.append("gemini")

whitespace_re = re.compile(r'\s')

@dataclass
class FeedEntry:
    feedurl: str
    author: str
    updated: int
    title: str
    link: str

@dataclass
class TwtxtEntry:
    feedurl: str
    author: str
    posted: float
    twt: str

def _cw(text: str) -> str:
    """ Collapse whitespace """
    return whitespace_re.sub(' ', text)

def parsegemsub(feed: str, baseurl: str) -> list[FeedEntry]:
    """
    :param feed: Feed contents.
    :param baseurl: Feed URL.
    """
    baseurl = URLHelper.resolve(baseurl)

    entries = []
    authorpattern = r'^#\s*([^#\r\n]+)'
    entriespattern = r'^=>\s*(\S+)\s+(\d{4}-\d{2}-\d{2})[^\r\n\S]*([^\r\n]*)'
    entriespatternmatches = re.findall(entriespattern, feed, re.MULTILINE)
    authorpatternmatch = re.findall(authorpattern, feed, re.MULTILINE)
    if authorpatternmatch:
        author = authorpatternmatch[0]
    else:
        return None
    for entrypatternmatch in entriespatternmatches:
        # Get our YYYY-MM-DD string, add time of day, parse to datetime.datetime,
        # convert to unix timestamp and cast to int
        try:
            timestr = datetime.strptime(entrypatternmatch[1] + " 12:00:00",
                                        "%Y-%m-%d %H:%M:%S")
            updated = int(datetime.timestamp(timestr))
        except Exception:
            continue
        # A gemsub feed can often have relative links, we'll have to absolutize them
        link = urllib.parse.urljoin(baseurl, entrypatternmatch[0])
        title = entrypatternmatch[2] if entrypatternmatch[2] else entrypatternmatch[1]
        entries.append(FeedEntry(baseurl, author, updated, title, link))
    return entries

def parsetwtxt(feed: str, baseurl: str) -> list[TwtxtEntry]:
    """
    :param feed: Feed contents.
    :param baseurl: Feed URL.
    """
    baseurl = URLHelper.resolve(baseurl)

    entries = []
    authorpattern = r'^#\s*nick\s*=\s*(\S+)'
    # This is a naive match, but we'll only keep those that validate eventually
    entriespattern = r'^(\S+)\t([^\r\n]+)'
    entriespatternmatches = re.findall(entriespattern, feed, re.MULTILINE)
    authorpatternmatch = re.findall(authorpattern, feed, re.MULTILINE)
    if authorpatternmatch:
        author = authorpatternmatch[0]
    else:
        author = baseurl
    for entrypatternmatch in entriespatternmatches:
        # Get our datetime string, parse to datetime.datetime, convert to unix
        # timestamp and cast to int
        try:
            posted = int(datetime.timestamp(datetime.strptime(entrypatternmatch[0],
                                                              "%Y-%m-%dT%H:%M:%S%z")))
        except Exception:
            continue
        entries.append(TwtxtEntry(feedurl=baseurl, author=author, posted=posted,
                                  twt=entrypatternmatch[1]))
    return entries

def parsexml(feed: str, baseurl: str) -> list[FeedEntry]:
    """
    :param feed: Feed contents.
    :param baseurl: Feed URL.
    """
    baseurl = URLHelper.resolve(baseurl)
    scheme = urllib.parse.urlparse(baseurl).scheme
    entries = []
    parsedfeed = feedparser.parse(feed)

    # Let's set author's name, or lacking that use the feed title.
    feedauthor = None
    feedtitle = None
    if (parsedfeed['feed'].has_key('author_detail')
        and parsedfeed['feed']['author_detail'].has_key('name')):
        feedauthor = _cw(parsedfeed['feed']['author_detail']['name'])

    if parsedfeed['feed'].has_key('title'):
        feedtitle = _cw(parsedfeed['feed']['title']) 

    if not feedauthor and feedtitle:
        feedauthor = feedtitle

    if not parsedfeed.has_key('entries'):
        return None

    for entry in parsedfeed['entries']:
        try: # The feed could miss all sorts of fields...
            if (entry.has_key('author_detail')
                and entry['author_detail'].has_key('name')):
                author = _cw(entry['author_detail']['name'])
            elif feedauthor:
                author = feedauthor
            else:
                continue
            # Seconds since epoch
            updated = int(time.mktime(entry['updated_parsed']))
            title = _cw(entry['title'])
            link = None
            if len(entry['links']) > 1:
                for l in entry['links']:
                    href = urllib.parse.urljoin(baseurl, l['href'])
                    if urllib.parse.urlparse(href).scheme == scheme:
                        link = href
                        break
            else:
                link = urllib.parse.urljoin(baseurl, _cw(entry['link']))
            if not link:
                continue
        except Exception:
            continue
        entries.append(FeedEntry(baseurl, author, updated, title, link))
    return entries
